options(warn=-1)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type :   vignette('rworldmap')
library(maps)
library(ggmap)
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(raster)
## 
## Attaching package: 'raster'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:tidyr':
## 
##     extract
library(rgdal)
## rgdal: version: 1.2-8, (SVN revision 663)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-4
library(rgeos)
## rgeos version: 0.3-23, (SVN revision 546)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 r0 
##  Linking to sp version: 1.2-4 
##  Polygon checking: TRUE
terr = read.csv('~/Downloads/globalterrorismdb_0617dist.csv', check.names = FALSE, header = TRUE, stringsAsFactors = FALSE)
terr=rename(terr, id=eventid, year=iyear, nation=country_txt, 
            Region=region_txt, attack=attacktype1_txt,
            target=targtype1_txt, weapon=weaptype1_txt, 
            Killed=nkill, wounded=nwound)

0.1 Data cleaning

We clean the data

terr$Killed=as.integer(terr$Killed)
terr$wounded=as.integer(terr$wounded)

terr$Killed[which(is.na(terr$Killed))] = 0
terr$wounded[which(is.na(terr$wounded))] = 0

terr$casualties=as.integer(terr$Killed+terr$wounded)

terr$nation[terr$nation=="United States"] <- "USA"
terr$nation[terr$nation=="United Kingdom"] <- "UK"
terr$nation[terr$nation=="People's Republic of the Congo"] <- "Republic of Congo"
terr$nation[terr$nation=="Bosnia-Herzegovina"] <- "Bosnia and Herzegovina"
terr$nation[terr$nation=="Slovak Republic"] <- "Slovakia"
global_t <- 
  terr %>%
  group_by(year,nation,Region) %>%
  summarize(Total=n())

global_y <- 
  global_t %>% 
  group_by(year) %>% 
  summarize(Total=sum(Total))

global_attacks <- 
  global_t %>%
  group_by(nation) %>%
  summarize(Total=sum(Total)) %>% 
  arrange(desc(Total))

attach(global_attacks)
global_n <- global_attacks[order(-Total),]
detach(global_attacks)

Let’s look at the number of terrorist attacks with the passage of time.

gy <- global_y %>%
  ggplot(mapping=aes(year,Total))+
  geom_line(color="red")+
  theme(legend.position="none", panel.background = NULL, axis.text.x = element_text(angle=45, vjust = 1))+
  labs(x="Year", y="Number of attacks", title="Number of global attacks over years")
ggplotly(gy, width = 800, height=480)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
global_kills_years <- 
  terr %>%
  group_by(year) %>%
  summarize(killed=sum(Killed))

global_wound_years <- 
  terr %>%
  group_by(year) %>%
  summarize(wounded=sum(wounded))

globe <- 
  global_kills_years %>% 
  inner_join(global_wound_years, by="year") %>%
  inner_join(global_y)
## Joining, by = "year"
df <- melt(globe, "year")
df=rename(df, effect=variable)

gky <- df %>%
  ggplot(mapping=aes(x=year,y=value, color=effect))+
  geom_line()+
  theme(panel.background = NULL, axis.text.x = element_text(angle=45, vjust = 1))+
  labs(x="Year", y="Count", title="Number of people killed/wounded over years against attacks")
ggplotly(gky, width = 800, height=450)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

High peaks can be seen in the people killed in the year 1984. In 2001, even though there was a fall in no. of terrorist attacks, the number of casualties were on a peak. Number of casualties suddenly started rising from 2011 to 2015.

0.2 attacks by highest casualties(killed+wounded)

#get weapon most used in each nation
terr$casualties=as.integer(terr$Killed+terr$wounded)
terr$casualties[which(is.na(terr$casualties))]=0
g_max_cas <- terr%>%
  top_n(10, casualties) %>%
  ggplot(mapping=aes(x=reorder(target1, -casualties), y=casualties, fill=target1)) +
  geom_bar(stat = 'identity')+
  theme(legend.position="none", panel.background = NULL, axis.text.x =  element_text(angle=50, vjust = 1))+
  labs(x="Target of attack", y="Number of casulaties", title="Terrorist attacks with most casualties")
ggplotly(g_max_cas)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

0.3 Terrorist groups whose attacks have lead to most casualties

gname_max_cas <- terr[c('gname', 'casualties')]%>%
  filter(gname!='Unknown') %>%
  group_by(gname) %>%
  summarize(Total=n())

g <- gname_max_cas %>%
  top_n(40, Total) %>%
  ggplot(mapping=aes(x=reorder(gname, -Total), y=Total, fill=gname)) +
  geom_bar(stat = 'identity')+
  theme(legend.position="none", panel.background = NULL, axis.text.x =  element_text(angle=50, vjust = 1))+
  labs(x="Terrorist group", y="Number of casulaties", title="Terrorist groups with most casualties")
ggplotly(g, width = 800, height = 450)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Let’s look at the 40 countries with maximum number of terrorist attacks, and 40 countries with least number of terrorist attacks

g2 <- global_n%>%
  top_n(40) %>%
  ggplot(mapping=aes(x=reorder(nation, -Total),y=Total,fill=nation)) + 
  geom_bar(stat='identity')+
  theme(legend.position="none", panel.background = NULL, axis.text.x = element_text(angle=90, vjust = 1))+
  labs(x="Countries", y="Number of attacks", title="Countries with most number of terrorist attacks")
## Selecting by Total
ggplotly(g2, width = 800, height=450)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
g2 <- global_n%>%
  top_n(-40)%>%
  ggplot(mapping=aes(x=reorder(nation, Total),y=Total,fill=nation)) + 
  geom_bar(stat='identity')+
  theme(legend.position="none", panel.background = NULL, axis.text.x = element_text(angle=90, vjust = 1))+
  labs(x="Countries", y="Number of attacks", title="Countries with least number of terrorist attacks")
## Selecting by Total
ggplotly(g2, width = 800, height=450)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

1 Relationships and inferences

We’ll take a look at relationship of some parameters. These relations, however, do not directly imply causation. Further analysis should be done for implying causation.

1.1 Casualties by region

g1 <- terr %>% 
  ggplot(aes(x = Region, y = casualties, fill=Region)) + 
  geom_boxplot() +
  theme(legend.position = "none", axis.text.x =  element_text(angle=45))

ylim1 = boxplot.stats(terr$casualties)$stats[c(1,5)]
g2 <- g1+coord_cartesian(ylim = ylim1*1.05)
ggplotly(g2)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

We can see that Middle East & North Africa has a higher median number of casualties(2) than other regions, which same as that for Sub-Saharan Africa. The least variant region in terms of number of casualties is North America. However, it has lot many outliers, with the 9/11 attacks resulting in most number of casualties(8749).

1.2 Casualties by attack type

g1 <- terr %>% 
  ggplot(aes(x = attack, y = casualties, fill=attack)) + 
  geom_boxplot() +
  theme(legend.position = "none", axis.text.x =  element_text(angle=45))

ylim1 = boxplot.stats(terr$casualties)$stats[c(1,5)]
g2 <- g1+coord_cartesian(ylim = ylim1*1.05)
ggplotly(g2, height = 500)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Leaving out the unknown attack types, no. of casualties are most highly variant in case of bombings/explosions. Hijacking and Hostage Taking(s) have low variance in no. of casualties, with outliers as high as 8749 in case of hijacking.

1.3 Casualties by weapon

g1 <- terr %>% 
  ggplot(aes(x = weapon, y = casualties, fill=weapon)) + 
  geom_boxplot() +
  theme(legend.position = "none", axis.text.x =  element_text(angle=45))

ylim1 = boxplot.stats(terr$casualties)$stats[c(1,5)]
g2 <- g1+coord_cartesian(ylim = ylim1*1.05)
ggplotly(g2, height = 500)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

The no. of casualties because of chemical weapons has been highly variant, with 25% of chemical attacks resultin casualties between 50 to 5513. There hasn’t been any casualty because of radilogical weapons.

1.4 Casualties by target

g1 <- terr %>% 
  ggplot(aes(x = target, y = casualties, fill=target)) + 
  geom_boxplot() +
  theme(legend.position = "none", axis.text.x =  element_text(angle=45))

ylim1 = boxplot.stats(terr$casualties)$stats[c(1,5)]
g2 <- g1+coord_cartesian(ylim = ylim1*1.05)
ggplotly(g2, height = 500)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Casualties related to transporation, military, and Private citizens & property have high variance. Attacks on Police, and non-state militia generally lead to more than 1 casualty, with as many as 11 casualties in some cases.